From 289d3e4f0000503fac5326e986f27e0b1d924c7f Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Sat, 27 Sep 2014 06:20:36 -0400 Subject: [PATCH] FormatJson::parse( TRY_FIXING ) - remove trailing commas Removes trailing commas from json text when parsing Solves very common cases like [1,2,3,] Resulting status will be set to OK but not Good to warn caller Change-Id: Ic0eb0a711da3ae578d6bb58d7474279d6845a4a7 --- includes/json/FormatJson.php | 31 +++++++++++++++- languages/i18n/en.json | 1 + languages/i18n/qqq.json | 1 + .../phpunit/includes/json/FormatJsonTest.php | 36 +++++++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/includes/json/FormatJson.php b/includes/json/FormatJson.php index 55656444c2..2dbbc3083c 100644 --- a/includes/json/FormatJson.php +++ b/includes/json/FormatJson.php @@ -61,7 +61,14 @@ class FormatJson { * * @since 1.24 */ - const FORCE_ASSOC = 1; + const FORCE_ASSOC = 0x1; + + /** + * If set, attempts to fix invalid json. + * + * @since 1.24 + */ + const TRY_FIXING = 0x2; /** * Regex that matches whitespace inside empty arrays and objects. @@ -149,6 +156,28 @@ class FormatJson { $result = json_decode( $value, $assoc ); $code = json_last_error(); + if ( $code === JSON_ERROR_SYNTAX && ( $options & self::TRY_FIXING ) !== 0 ) { + // The most common error is the trailing comma in a list or an object. + // We cannot simply replace /,\s*[}\]]/ because it could be inside a string value. + // But we could use the fact that JSON does not allow multi-line string values, + // And remove trailing commas if they are et the end of a line. + // JSON only allows 4 control characters: [ \t\r\n]. So we must not use '\s' for matching. + // Regex match ,]\n or ,\n] with optional spaces/tabs. + $count = 0; + $value = + preg_replace( '/,([ \t]*[}\]][^"\r\n]*([\r\n]|$)|[ \t]*[\r\n][ \t\r\n]*[}\]])/', '$1', + $value, - 1, $count ); + if ( $count > 0 ) { + $result = json_decode( $value, $assoc ); + if ( JSON_ERROR_NONE === json_last_error() ) { + // Report warning + $st = Status::newGood( $result ); + $st->warning( wfMessage( 'json-warn-trailing-comma' )->numParams( $count ) ); + return $st; + } + } + } + switch ( $code ) { case JSON_ERROR_NONE: return Status::newGood( $result ); diff --git a/languages/i18n/en.json b/languages/i18n/en.json index e8a5c3c1b5..7a1d2f56ae 100644 --- a/languages/i18n/en.json +++ b/languages/i18n/en.json @@ -3559,6 +3559,7 @@ "mediastatistics-header-text": "Textual", "mediastatistics-header-executable": "Executables", "mediastatistics-header-archive": "Compressed formats", + "json-warn-trailing-comma": "$1 trailing {{PLURAL:$1|comma was|commas were}} removed from JSON", "json-error-unknown": "There was a problem with the JSON. Error: $1", "json-error-depth": "The maximum stack depth has been exceeded", "json-error-state-mismatch": "Invalid or malformed JSON", diff --git a/languages/i18n/qqq.json b/languages/i18n/qqq.json index 2049ae517f..96e7face70 100644 --- a/languages/i18n/qqq.json +++ b/languages/i18n/qqq.json @@ -3721,6 +3721,7 @@ "mediastatistics-header-text": "Header on [[Special:MediaStatistics]] for file types that are in the text category. This includes simple text formats, including plain text formats, json, csv, and xml. Source code of compiled programming languages may be included here in the future, but isn't currently.", "mediastatistics-header-executable": "Header on [[Special:MediaStatistics]] for file types that are in the executable category. This includes things like source files for interpreted programming language (Shell scripts, javascript, etc).", "mediastatistics-header-archive": "Header on [[Special:MediaStatistics]] for file types that are in the archive category. Includes things like tar, zip, gzip etc.", + "json-warn-trailing-comma": "A warning message notifying that JSON text was automatically corrected by removing erroneous commas.\n\nParameters:\n* $1 - number of commas that were removed", "json-error-unknown": "User error message when there’s an unknown error.\n{{Identical|Unknown error}}. This error is shown if we received an unexpected value from PHP. See http://php.net/manual/en/function.json-last-error.php\n\nParameters:\n* $1 - integer error code", "json-error-depth": "User error message when the maximum stack depth is exceeded.\nSee http://php.net/manual/en/function.json-last-error.php", "json-error-state-mismatch": "User error message when underflow or the modes mismatch.\n\n'''Underflow''': A data-processing error arising when the absolute value of a computed quantity is smaller than the limits of precision of the computing device, retaining at least one significant digit.\nSee http://php.net/manual/en/function.json-last-error.php", diff --git a/tests/phpunit/includes/json/FormatJsonTest.php b/tests/phpunit/includes/json/FormatJsonTest.php index 0f1cdf7817..af68ab03ad 100644 --- a/tests/phpunit/includes/json/FormatJsonTest.php +++ b/tests/phpunit/includes/json/FormatJsonTest.php @@ -169,6 +169,42 @@ class FormatJsonTest extends MediaWikiTestCase { $this->assertEquals( $value, $st->getValue() ); } + public static function provideParseTryFixing() { + return array( + array( "[,]", '[]' ), + array( "[ , ]", '[]' ), + array( "[ , }", false ), + array( '[1],', false ), + array( "[1,]", '[1]' ), + array( "[1\n,]", '[1]' ), + array( "[1,\n]", '[1]' ), + array( "[1,]\n", '[1]' ), + array( "[1\n,\n]\n", '[1]' ), + array( '["a,",]', '["a,"]' ), + array( "[[1,]\n,[2,\n],[3\n,]]", '[[1],[2],[3]]' ), + array( '[[1,],[2,],[3,]]', false ), // I wish we could parse this, but would need quote parsing + array( '[1,,]', false ), + ); + } + + /** + * @dataProvider provideParseTryFixing + * @param string $value + * @param string|bool $expected + */ + public function testParseTryFixing( $value, $expected ) { + $st = FormatJson::parse( $value, FormatJson::TRY_FIXING ); + $this->assertType( 'Status', $st ); + if ( $expected === false ) { + $this->assertFalse( $st->isOK() ); + } else { + $this->assertFalse( $st->isGood() ); + $this->assertTrue( $st->isOK() ); + $val = FormatJson::encode( $st->getValue(), false, FormatJson::ALL_OK ); + $this->assertEquals( $expected, $val ); + } + } + public static function provideParseErrors() { return array( array( 'aaa' ), -- 2.20.1